######################
## Data analysis for
## climate change conspiracy
## paper with J. Uscinski
##
## Last modified: March 4 2016.
###########################
set.seed(831213)

library(MASS)
library(foreign)
library(basicspace)
library(car)
library(plyr)
library(lattice)
library(latticeExtra)
library(doMC)
library(stargazer)

setwd("~/Dropbox/Issues Positions and Conspiracy Dispositions")

ziptemps  <- read.csv("data/ZipcodeTemps81-10.csv",colClasses=c("factor","numeric"))
names(ziptemps)[1]  <- "lookupzip"
consp.data  <- read.spss("data/CCES12_MIA_OUTPUT_20130621.sav"
                         ,to.data.frame=TRUE)
consp.data$lookupzip  <- substr(consp.data$lookupzip,1,5)
consp.data$faminc  <-  as.factor(recode(as.numeric(consp.data$faminc)
                                        ,"1:3='<30k';4:6='30k-60k';7:11='60k-150k';12:18='>150k';else=NA"))
consp.data$faminc  <- relevel(consp.data$faminc,ref="<30k")
consp.data$MIA388_ab1  <- ifelse(is.na(consp.data$MIA388_a_1),consp.data$MIA388_b_4,consp.data$MIA388_a_1)
consp.data$MIA388_ab2  <- ifelse(is.na(consp.data$MIA388_a_2),consp.data$MIA388_b_3,consp.data$MIA388_a_2)
consp.data$MIA388  <- ifelse(consp.data$MIA388_ab1==1|consp.data$MIA388_ab2==1,1,0)

##Common questions
common_q <- c( "V101"
               ,"V102"
               ,"lookupzip"
               ,"faminc"
               ,"educ"
               ,"race"
               ,"gender"
               ,"birthyr"
               ,"inputstate"
               ,"pew_religimp" #Importance of religion
               ,"CC334A" #Ideology
               ,"MIA382" #trust the Gov.
               ,"MIA383" #trust the Gov.
               ,"MIA384" #Trust the media
               ,'pid7' #partyid
               ,"pid3" 
               ,"CC350" #Party registration 
               ,"newsint"
               ,"MIA374","MIA379","MIA380","MIA381" #Conspiracy items
               ,"MIA375_1","MIA375_2","MIA375_3","MIA375_4","MIA375_7","MIA375_8","MIA375_9","MIA375_10","MIA375_11"#Conspiracy groups
)

##Similar questions:
##Goal is to have larger values indicate more liberal positions
##or, if dichotomous, make liberal positon the non-reference
similar_q <- c("CC332I" #ACA 2010 (make oppose reference)
               ,"CC332H" #Keystone (make support reference) 
               ,"CC332G" #Repeal Obamacare (make support reference)
               ,"CC414_6" # UN (make no reference)
               ,"MIA367" #Photo ID (reverse order, so oppose is larger)
               ,"CC302" #Economic outlook (reverse order, and put not sure with stayed about the same )
               #,"CC302b" #Economic responsibility
               ,"CC305" #Mistake to invade Iraq (make no reference)
               ,"CC306" #Mistake to invade Afghanistan (make no reference)
               ,"CC308b" #Obama approval (reverse order, so approve is larger)
               #,"CC308b" #Congress approval
               ,"CC320" #Gun control (make order Less Strict, Kept As They Are, More Strict)
               ,"CC321" #Climate change (reverse order)
               ,"CC322_2" #Border patrol (make yes the reference)
) 

##Different questions:
different_q <- c("CC324" #Abortion (higher values are more liberal)
                 ,"CC326" #Gay marriage (make oppose reference)
                 ,"CC327" #Affirmative action (reverse order)
) 

## Data processing
consp.data.sub <- consp.data[,c(common_q,similar_q,different_q)]
which_few <- names(table(consp.data.sub$inputstate))[which(table(consp.data.sub$inputstate)<3)]
consp.data.sub <- subset(consp.data.sub,!inputstate%in%which_few)
consp.data.sub$inputstate <-consp.data.sub$inputstate[,drop=TRUE] 
#Race: white?
consp.data.sub$race  <- consp.data.sub$race=="White"
#Obtain temperatures
#consp.data.sub  <- consp.data[complete.cases(consp.data.sub),]
consp.data.sub  <- merge(consp.data.sub,ziptemps)


#PID processing
consp.data.sub$pid3[consp.data.sub$pid3=="Not sure"]  <- "Independent " 
consp.data.sub$pid7[consp.data.sub$pid7=="Not sure"]  <- "Independent" 

consp.data.sub$pid3  <- relevel(consp.data.sub$pid3,ref="Independent ")
consp.data.sub  <- subset(consp.data.sub,pid3!="Other ")
consp.data.sub$pid3  <- consp.data.sub$pid3[,drop=TRUE]

#Ideology
consp.data.sub$CC334A[consp.data.sub$CC334A=="Not Sure"]  <- "Middle of the Road"
consp.data.sub$Ideol <- cut(as.numeric(consp.data.sub$CC334A),c(0,3,4,7)
                            ,labels=c("Liberal","Centrist","Conservative"))

##Question processing: reorder levels so higher/non-reference cats are more liberal
consp.data.sub$MIA367  <- factor(consp.data.sub$MIA367,levels=rev(levels(consp.data.sub$MIA367)))
consp.data.sub$CC302[consp.data.sub$CC302=="Not sure"]  <- "Stayed about the same"
consp.data.sub$CC302 <- consp.data.sub$CC302[,drop=TRUE]
consp.data.sub$CC302  <- factor(consp.data.sub$CC302,levels=rev(levels(consp.data.sub$CC302)))
consp.data.sub$CC305  <- factor(consp.data.sub$CC305,levels=c("No","Not Sure","Yes"))
consp.data.sub$CC306  <- factor(consp.data.sub$CC306,levels=c("No","Not Sure","Yes"))
consp.data.sub$CC308b[consp.data.sub$CC308b=="Not Sure"]  <- "Somewhat Approve"
consp.data.sub$CC308b <- consp.data.sub$CC308b[,drop=TRUE]
consp.data.sub$CC308b  <- factor(consp.data.sub$CC308b,levels=rev(levels(consp.data.sub$CC308b)))
consp.data.sub$CC320  <- factor(consp.data.sub$CC320,levels=c("Less Strict","Kept As They Are","More Strict"))
consp.data.sub$CC321  <- factor(consp.data.sub$CC321,levels=rev(levels(consp.data.sub$CC321)))
consp.data.sub$CC322_2 <- relevel(consp.data.sub$CC322_2,ref="Yes")
consp.data.sub$CC326 <- relevel(consp.data.sub$CC326,ref="Oppose")
consp.data.sub$CC327  <- factor(consp.data.sub$CC327,levels=rev(levels(consp.data.sub$CC327)))
consp.data.sub$CC332I <- relevel(consp.data.sub$CC332I,ref="Oppose")
consp.data.sub$CC332H <- relevel(consp.data.sub$CC332H,ref="Support")
consp.data.sub$CC332G <- relevel(consp.data.sub$CC332G,ref="Support")
consp.data.sub$CC414_6 <- relevel(consp.data.sub$CC414_6,ref="No")
consp.data.sub$MIA384 <- factor(consp.data.sub$MIA384,c(levels(consp.data.sub$MIA384)[6],levels(consp.data.sub$MIA384)[-6]))



######################################
##Estimate IRT

irt.data.consp  <- subset(consp.data.sub,select=c(MIA374
                                                 #,MIA375_8
                                                 #,MIA375_10
                                                 ,MIA379
                                                 ,MIA380
                                                 ,MIA381
                                                 ,MIA382
                                                 ,MIA383
                                                 ,MIA384
                                                 ))
data.whoconsp  <- subset(consp.data.sub,select=c(MIA375_1
                                                    ,MIA375_2
                                                    ,MIA375_3
                                                    ,MIA375_4
                                                    #,MIA375_7
                                                    ,MIA375_9
                                                    ,MIA375_11
))

irt.consp <- grm(irt.data.consp, IRT.param = TRUE)
consp.data.sub$consp.bb <- factor.scores(irt.consp, irt.data.consp)$score.dat$z1

whoconsp  <- ifelse(data.whoconsp$MIA375_11=="Yes","None"
                      ,ifelse((data.whoconsp$MIA375_3=="Yes"|data.whoconsp$MIA375_9=="Yes"|data.whoconsp$MIA375_4=="Yes")&(data.whoconsp$MIA375_2=="No"&data.whoconsp$MIA375_1=="No"),"Liberals"
                       ,ifelse((data.whoconsp$MIA375_1=="Yes"|data.whoconsp$MIA375_2=="Yes")&(data.whoconsp$MIA375_3=="No"&data.whoconsp$MIA375_9=="No"&data.whoconsp$MIA375_4=="No"),"Conservatives"
                              ,"Other")))
consp.data.sub$consp.bb.who <- factor(whoconsp,levels=c("Other","None","Liberals","Conservatives"))

###################################
consp.data.sub$pid3 <- factor(consp.data.sub$pid3
                              , levels=levels(consp.data.sub$pid3)[c(2,1,3)])
par(mfrow=c(1,2))
hist(consp.data.sub$consp.bb
     , main=""
     ,ylab=""
     ,xlab="Cospiratorial Predisposition",axes=FALSE)
axis(1)
boxplot(consp.bb ~ pid3, data=consp.data.sub, box=FALSE
        ,xlab="Conspiratorial Predisposition"
        ,frame=FALSE
        ,horizontal=TRUE
        ,las=1)

###################################

pred_fun <- function(predictor_vals,model){
  new_data_temp <- model.frame(model)[,-1]
  preds <- adply(predictor_vals,1,function(x){
    new_data <- new_data_temp
    new_data$consp.bb <- x$consp.bb
    #new_data$consp.bb.who <- factor(x$consp.bb.who, levels=levels(new_data$consp.bb.who))
    new_data$pid3 <- factor(x$pid3,levels=levels(new_data$pid3))
    #new_data$Ideol <- factor(x$Ideol,levels=levels(new_data$Ideol))
    new.des.mat <- model.matrix(~pid3
                                *consp.bb
                                *consp.bb.who
                                +faminc
                                +pew_religimp
                                +educ
                                +race
                                +gender
                                +birthyr
                                +aug_normal
                                +inputstate
                                +newsint
                                ,new_data)

    consp.vcov <- vcov(model)
    if(any(grepl("\\|",colnames(consp.vcov))))
    {
      
      tau.index  <- grep("\\|",colnames(consp.vcov)) #Avoid sampling thresholds in ordered models
      sim.coefs <- mvrnorm(500,coef(model),consp.vcov[-tau.index,-tau.index])
      new.des.mat <- new.des.mat[,-1] #Remove intercept from ordered models
      etas  <- new.des.mat%*%t(sim.coefs)
      taus <-  c(model$zeta[1],rev(model$zeta)[1])

    }else{
      sim.coefs <- mvrnorm(500,coef(model),consp.vcov)
      etas  <- new.des.mat%*%t(sim.coefs)
      taus <- c(0,0)
    }
    

    #prob_cons <- pnorm(taus[1],etas)
    prob_lib <- pnorm(taus[2],etas,lower.tail = FALSE)
    #prob_cons <- pnorm(taus[1],etas)
    
    #odds_lib <- log(prob_lib/(1-prob_lib))
    # 
    # prob_lib <- aaply(prob_lib,1,quantile, probs=c(0.025,0.5,0.975))
    # odds_lib <- aaply(odds_lib,1,quantile, probs=c(0.025,0.5,0.975))
    # prob_lib <- colMeans(prob_lib)
    # odds_lib <- colMeans(odds_lib)
    # return(prob_lib)
  })
  preds
}


##Ordered/regular probits
model_est <- function(name,who_model=TRUE,print=FALSE)
{
  outcome <- with(consp.data.sub
                  ,switch(name
                          ,"Climate Change"=CC321
                          ,"Affordable Care Act"=CC332I
                          ,"Keystone Pipeline"=CC332H
                          ,"Repeal ACA"=CC332G
                          ,"Back UN Efforts"=CC414_6
                          ,"Require Photo ID"=MIA367
                          ,"Economy is doing well"=CC302
                          ,"Mistake to Invade Iraq"=CC305
                          ,"Mistake to Invade Afgh."=CC306
                          ,"Pres. Obama is doing well"=CC308b
                          ,"Gun Control"=CC320
                          ,"Increase Border Patrol"=CC322_2
                          ,"Abortion"=CC324
                          ,"Gay Marriage"=CC326
                          ,"Affirmative Action"=CC327))
  if(length(levels(outcome))>2)
  {
    model_res <- polr(outcome~pid3
                      *consp.bb
                      *consp.bb.who
                      +faminc
                      +pew_religimp
                      +educ
                      +race
                      +gender
                      +birthyr
                    +aug_normal
                  +inputstate
                      +newsint
                      ,method="probit"
                      ,Hess=TRUE
                      ,data=consp.data.sub)
  }else{
    model_res <- glm(outcome~pid3
                     *consp.bb
                    *consp.bb.who
                     + faminc
                     +pew_religimp
                     +educ
                     +race
                     +gender
                     +birthyr
                     +aug_normal
                     +inputstate
                     +newsint
                     ,family=binomial("probit")
                     ,data=consp.data.sub)
  }
  if(print)
  {
    stargazer(model_res,type="html",omit=c("state"),report=c("vc*s"),star.cutoffs=c(0.95,NA,NA),out="table.htm",out.header=TRUE,ord.intercepts=TRUE)
    print(summary(model_res))
  }
  #Get difference in lowest vs. highest categories
  #As combination changes.

  consp.bb.who <- c("Liberals", "Conservatives")
  #consp.bb.who <- c("Liberals")
  consp.bb <- seq(min(consp.data.sub$consp.bb,na.rm=TRUE)
                  ,max(consp.data.sub$consp.bb,na.rm=TRUE)
                  ,length.out=2)
  if(who_model)
    pid3 <- c("Independent ")
  else  
    pid3 <- c("Democrat ","Independent ", "Republican ")
  Ideol <- c("Centrist")
  all.sim.data <- expand.grid(consp.bb.who,consp.bb,pid3)
  names(all.sim.data) <- c("consp.bb.who","consp.bb","pid3")

  
  
  results <- adply(all.sim.data,1,.fun=pred_fun,model=model_res)
  
  results_diff <- ddply(results, ifelse(who_model,"consp.bb.who","pid3")
                            ,function(x){
                              sub_low <- subset(x,consp.bb==min(consp.bb),select=-c(consp.bb.who,consp.bb,pid3))
                              sub_high <- subset(x,consp.bb==max(consp.bb),select=-c(consp.bb.who,consp.bb,pid3))
                              res <- sub_high-sub_low 
                              if(who_model)
                                res <- res - 0.15 #offset correction
                              res <- colMeans(res)
                              res <- quantile(res,probs=c(0.1,0.5,0.9))
                            }) 
  results_diff$model <- name
  names(results_diff)[c(2:4)] <- c("Low","Median","High")
  return(results_diff)
  
}

all_models_similar <-  c("Climate Change"
                         ,"Affordable Care Act"
                         ,"Increase Border Patrol"
                         ,"Require Photo ID"
                         ,"Mistake to Invade Iraq"
                         ,"Economy is doing well"
                         ,"Gun Control"
                         ,"Affirmative Action"
)

all_models_diff <-c("Abortion"
                         ,"Keystone Pipeline"
                    ,"Gay Marriage"
                         ,"Mistake to Invade Afgh."
                    #,"Pres. Obama is doing well"
                    #,"Economy is doing well"
)

registerDoMC(3)
climate_change <- model_est("Climate Change",FALSE,TRUE)

system.time(all_preds_similar <- ldply(all_models_similar
                                       ,.fun=model_est
                                       ,.parallel = TRUE))


system.time(all_preds_diff <- ldply(all_models_diff,.fun=model_est,.parallel = TRUE))
names(all_preds_diff)[c(2:4)] <- c("Low","Median","High")

#####Plots & tables

#### Partisanship and Climate change

xyplot(Median~pid3
       ,layout=c(1,1)
       #,groups=consp.bb.who
       #,groups=Party
       #,index.cond=list(c(2,1))
       ,data=climate_change
       ,hi=climate_change$High
       ,lo=climate_change$Low
       ,main="Climate Change"
       #,scales=list(y="free")
       #,scales=list(x=list(at=c(min(consp.data.sub$consp.bb)
       #                          ,max(consp.data.sub$consp.bb))
       #                     ,labels=c("Low","High")))
       #,xlab="Conspiratorial Predisposition"
       ,xlab="Partisanship"
       ,ylab="Effect of Conspiracism on Probability\n of Accepting Scientific Consensus"
       ,par.settings=standard.theme("pdf", color=FALSE)
       ,prepanel=function(x,y,subscripts,hi,lo,...){
         list(ylim=c(min(lo[subscripts]),max(hi[subscripts])))
       }
       ,panel=function(x,y,subscripts,hi,lo,...){
         # panel.polygon(c(x,rev(x)),c(hi[subscripts],rev(lo[subscripts]))
         #               ,col=gray(0.7,0.7)
         #               ,border=gray(0.7,0.7))
         panel.arrows(x0=x,y0=c(lo[subscripts])
                      ,x1=x,y1=(hi[subscripts])
                      ,lwd=2,code=3,angle=90
                      ,length=0.05)
         panel.xyplot(x,y,type='p'
                      ,lty=1
                      ,lwd=2
                      ,cex=0.7
         )
         panel.abline(h=0,lty=3,col=gray(0.7,0.7))
       }
)

#### Independents and different issues
xyplot(Median~consp.bb.who|model
       ,layout=c(4,2)
       
                                 #,groups=consp.bb.who
                                 #,groups=Party
                                 #,index.cond=list(c(2,1))
                                 ,data=all_preds_similar
                                 ,hi=all_preds_similar$High
                                 ,lo=all_preds_similar$Low
       #,scales=list(y="free")
                                 #,scales=list(x=list(at=c(min(consp.data.sub$consp.bb)
                                #                          ,max(consp.data.sub$consp.bb))
                                #                     ,labels=c("Low","High")))
                                 #,xlab="Conspiratorial Predisposition"
                                ,xlab="Who Conspires?"
                                 ,ylab="Effect of Conspiracism on \n Probability of Liberal Position"
                                 ,par.settings=standard.theme("pdf", color=FALSE)
                                  ,prepanel=function(x,y,subscripts,hi,lo,...){
                                    list(ylim=c(min(lo[subscripts]),max(hi[subscripts])))
                                  }
                                 ,panel=function(x,y,subscripts,hi,lo,...){
                                   # panel.polygon(c(x,rev(x)),c(hi[subscripts],rev(lo[subscripts]))
                                   #               ,col=gray(0.7,0.7)
                                   #               ,border=gray(0.7,0.7))
                                   panel.arrows(x0=x,y0=c(lo[subscripts])
                                                  ,x1=x,y1=(hi[subscripts])
                                                  ,lwd=2,code=3,angle=90
                                                ,length=0.05)
                                   panel.xyplot(x,y,type='p'
                                                ,lty=1
                                                ,lwd=2
                                                ,cex=0.7
                                                )
                                   panel.abline(h=0,lty=3,col=gray(0.7,0.7))
                                 }
)
#pred.vals.plot
# useOuterStrips(pred.vals.plot_similar)
# 
xyplot(Median~consp.bb.who|model
       #,groups=consp.bb.who
       #,groups=Party
       #,index.cond=list(c(2,1))
       ,layout=c(4,1)
       ,data=all_preds_diff
       ,hi=all_preds_diff$High
       ,lo=all_preds_diff$Low
       #,scales=list(y="free")
       #,scales=list(x=list(at=c(min(consp.data.sub$consp.bb)
       #                          ,max(consp.data.sub$consp.bb))
       #                     ,labels=c("Low","High")))
       #,xlab="Conspiratorial Predisposition"
       ,xlab="Who Conspires?"
       ,ylab="Effect of Conspiracism on \n Probability of Liberal Position"
       ,par.settings=standard.theme("pdf", color=FALSE)
       ,prepanel=function(x,y,subscripts,hi,lo,...){
         list(ylim=c(min(lo[subscripts]),max(hi[subscripts])))
       }
       ,panel=function(x,y,subscripts,hi,lo,...){
         # panel.polygon(c(x,rev(x)),c(hi[subscripts],rev(lo[subscripts]))
         #               ,col=gray(0.7,0.7)
         #               ,border=gray(0.7,0.7))
         panel.arrows(x0=x,y0=c(lo[subscripts])
                      ,x1=x,y1=(hi[subscripts])
                      ,lwd=2,code=3,angle=90
                      ,length=0.05)
         panel.xyplot(x,y,type='p'
                      ,lty=1
                      ,lwd=2
                      ,cex=0.7
         )
         panel.abline(h=0,lty=3,col=gray(0.7,0.7))
       }
)  

## Does conspiracism predict which way independents lean?
lean_data <- subset(consp.data.sub,pid3=="Independent "&consp.bb.who!="Other"&consp.bb.who!="None",drop=TRUE)
lean_data$pid7 <- lean_data$pid7[,drop=TRUE]
lean_data$consp.bb.who <- lean_data$consp.bb.who[,drop=TRUE]
lean_model <- polr(pid7~poly(consp.bb,1,raw=TRUE)
                  *consp.bb.who
                   +faminc
                   +pew_religimp
                   +educ
                   +race
                   +gender
                   +birthyr
                   ,method="probit"
                   ,Hess=TRUE
                   ,data=lean_data)
summary(lean_model)

sim_bb <- seq(min(lean_data$consp.bb)
              ,max(lean_data$consp.bb)
              ,length.out=100)
pred.df_lib <- data.frame(consp.bb=sim_bb
                      ,consp.bb.who="Liberals"
                      ,faminc="60k-150k"
                      ,pew_religimp="Very important"
                      ,educ="Some college"
                      ,race=TRUE
                      ,gender="Male"
                      ,birthyr=1960)
pred.df_con <- data.frame(consp.bb=sim_bb
                          ,consp.bb.who="Conservatives"
                          ,faminc="60k-150k"
                          ,pew_religimp="Very important"
                          ,educ="Some college"
                          ,race=TRUE
                          ,gender="Male"
                          ,birthyr=1960)
pred_lib <- as.data.frame(predict(lean_model,pred.df_lib,"probs"))
pred_lib$who <- "Liberals"
pred_lib$consp.bb <- sim_bb
pred_con <- as.data.frame(predict(lean_model,pred.df_con,"probs"))
pred_con$who <- "Conservatives"
pred_con$consp.bb <- sim_bb
all_preds_lean <- rbind(pred_lib,pred_con)
all_preds_lean[,2] <- all_preds_lean[,1]+all_preds_lean[,2]
all_preds_lean[,3] <- all_preds_lean[,2]+all_preds_lean[,3]

xyplot(Independent~consp.bb|who
       ,data=all_preds_lean
       ,l_d = all_preds_lean[,1]
       ,l_i = all_preds_lean[,2]
       ,l_r = all_preds_lean[,3]
       ,index.cond=list(c(2,1))
       ,xlim=c(-0.5,0.55)
       ,ylim=c(0,1)
       ,par.settings=standard.theme("pdf", color=FALSE)
       ,xlab="Conspiracism"
       ,ylab="Probability of Leaning"
       ,panel=function(x,y,subscripts,l_d,l_r,l_i,...){
         panel.polygon(c(x,rev(x))
                       ,c(l_d[subscripts],rep(0,100))
                       ,col="gray40"
                       ,border="white")
         panel.polygon(c(x,rev(x))
                       ,c(l_i[subscripts],rev(l_d[subscripts]))
                       ,col="gray60"
                       ,border="white")
         panel.polygon(c(x,rev(x))
                       ,c(rep(1,100),rev(l_i[subscripts]))
                       ,col="gray80"
                       ,border="white")
       })

